Construct Single-Hierarchical P/NBD Model for CD-Now Transaction Data

Author

Mick Cooney

Published

December 7, 2023

In this workbook we construct the non-hierarchical P/NBD models on the synthetic data with the longer timeframe.

1 Load and Construct Datasets

1.1 Load CD-NOW Transaction Data

We now want to load the CD-NOW transaction data.

Code
customer_cohortdata_tbl <- read_rds("data/cdnow_customer_cohort_data_tbl.rds")
customer_cohortdata_tbl |> glimpse()
Rows: 23,570
Columns: 5
$ customer_id     <chr> "00001", "00002", "00003", "00004", "00005", "00006", …
$ cohort_qtr      <chr> "1997 Q1", "1997 Q1", "1997 Q1", "1997 Q1", "1997 Q1",…
$ cohort_ym       <chr> "1997 01", "1997 01", "1997 01", "1997 01", "1997 01",…
$ first_tnx_date  <date> 1997-01-01, 1997-01-12, 1997-01-02, 1997-01-01, 1997-…
$ total_tnx_count <int> 1, 2, 6, 4, 11, 1, 3, 8, 3, 1, 4, 1, 1, 1, 1, 4, 1, 1,…
Code
customer_transactions_tbl <- read_rds("data/cdnow_transaction_data_tbl.rds")
customer_transactions_tbl |> glimpse()
Rows: 69,659
Columns: 8
$ customer_id   <fct> 00001, 00002, 00002, 00003, 00003, 00003, 00003, 00003, …
$ tnx_date      <date> 1997-01-01, 1997-01-12, 1997-01-12, 1997-01-02, 1997-03…
$ tnx_timestamp <dttm> 1997-01-01 21:57:19, 1997-01-12 06:52:02, 1997-01-12 22…
$ tnx_dow       <fct> Wed, Sun, Sun, Thu, Sun, Wed, Sat, Tue, Thu, Wed, Sat, S…
$ tnx_month     <fct> Jan, Jan, Jan, Jan, Mar, Apr, Nov, Nov, May, Jan, Jan, A…
$ tnx_week      <chr> "00", "01", "01", "00", "12", "13", "45", "47", "21", "0…
$ cd_count      <int> 1, 5, 1, 2, 2, 2, 5, 4, 1, 2, 2, 1, 2, 2, 1, 3, 3, 3, 2,…
$ total_spend   <dbl> 11.77, 77.00, 12.00, 20.76, 20.76, 19.54, 57.45, 20.96, …
Code
customer_subset_id <- read_rds("data/cdnow_customer_subset_ids.rds")
customer_subset_id |> glimpse()
 Factor w/ 23570 levels "00030","00029",..: 14 18 61 64 76 81 96 162 164 202 ...

We re-produce the visualisation of the transaction times we used in previous workbooks.

Code
plot_tbl <- customer_transactions_tbl |>
  group_nest(customer_id, .key = "cust_data") |>
  filter(map_int(cust_data, nrow) > 3) |>
  slice_sample(n = 30) |>
  unnest(cust_data)

ggplot(plot_tbl, aes(x = tnx_timestamp, y = customer_id)) +
  geom_line() +
  geom_point() +
  labs(
      x = "Date",
      y = "Customer ID",
      title = "Visualisation of Customer Transaction Times"
    ) +
  theme(axis.text.y = element_text(size = 10))

1.2 Load Derived Data

Code
obs_fitdata_tbl   <- read_rds("data/cdnow_obs_fitdata_tbl.rds")
obs_validdata_tbl <- read_rds("data/cdnow_obs_validdata_tbl.rds")

customer_fit_stats_tbl <- obs_fitdata_tbl |>
  rename(x = tnx_count)

1.3 Load Subset Data

We also want to construct our data subsets for the purposes of speeding up our valuations.

Code
customer_fit_subset_tbl <- obs_fitdata_tbl |>
  filter(customer_id %in% customer_subset_id)

customer_fit_subset_tbl |> glimpse()
Rows: 1,000
Columns: 6
$ customer_id    <fct> 00233, 00181, 00131, 00144, 00080, 00149, 00105, 00172,…
$ first_tnx_date <dttm> 1997-01-01 00:53:10, 1997-01-01 01:22:45, 1997-01-01 0…
$ last_tnx_date  <dttm> 1997-01-01 00:53:10, 1997-01-01 01:22:45, 1997-01-01 0…
$ tnx_count      <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 1, 0…
$ t_x            <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 3.244…
$ T_cal          <dbl> 52.13758, 52.13465, 52.10480, 52.10328, 52.09607, 52.09…
Code
customer_valid_subset_tbl <- obs_validdata_tbl |>
  filter(customer_id %in% customer_subset_id)

customer_valid_subset_tbl |> glimpse()
Rows: 1,000
Columns: 3
$ customer_id       <fct> 00233, 00181, 00131, 00144, 00080, 00149, 00105, 001…
$ tnx_count         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0…
$ tnx_last_interval <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 20.68985…

We now use these datasets to set the start and end dates for our various validation methods.

Code
dates_lst <- read_rds("data/cdnow_simulation_dates.rds")

use_fit_start_date <- dates_lst$use_fit_start_date
use_fit_end_date   <- dates_lst$use_fit_end_date

use_valid_start_date <- dates_lst$use_valid_start_date
use_valid_end_date   <- dates_lst$use_valid_end_date

We now split out the transaction data into fit and validation datasets.

Code
customer_fit_transactions_tbl <- customer_transactions_tbl |>
  filter(
    customer_id %in% customer_subset_id,
    tnx_timestamp >= use_fit_start_date,
    tnx_timestamp <= use_fit_end_date
    )
  
customer_fit_transactions_tbl |> glimpse()
Rows: 2,281
Columns: 8
$ customer_id   <fct> 00013, 00016, 00016, 00016, 00016, 00080, 00105, 00131, …
$ tnx_date      <date> 1997-01-01, 1997-01-01, 1997-07-13, 1997-07-26, 1997-09…
$ tnx_timestamp <dttm> 1997-01-01 22:58:54, 1997-01-01 23:18:11, 1997-07-13 14…
$ tnx_dow       <fct> Wed, Wed, Sun, Sat, Wed, Wed, Wed, Wed, Wed, Wed, Fri, W…
$ tnx_month     <fct> Jan, Jan, Jul, Jul, Sep, Jan, Jan, Jan, Jan, Jan, Jan, J…
$ tnx_week      <chr> "00", "00", "27", "29", "36", "00", "00", "00", "00", "0…
$ cd_count      <int> 4, 1, 2, 4, 1, 4, 3, 2, 2, 4, 4, 1, 5, 3, 1, 1, 1, 2, 1,…
$ total_spend   <dbl> 72.94, 9.77, 31.60, 28.51, 9.99, 43.46, 35.31, 30.32, 30…
Code
customer_valid_transactions_tbl <- customer_transactions_tbl |>
  filter(
    customer_id %in% customer_subset_id,
    tnx_timestamp >= use_valid_start_date,
    tnx_timestamp <= use_valid_end_date
    )
  
customer_valid_transactions_tbl |> glimpse()
Rows: 473
Columns: 8
$ customer_id   <fct> 00360, 00360, 00360, 00559, 00663, 00663, 00663, 00663, …
$ tnx_date      <date> 1998-02-12, 1998-05-01, 1998-05-25, 1998-06-01, 1998-03…
$ tnx_timestamp <dttm> 1998-02-12 07:30:09, 1998-05-01 23:41:09, 1998-05-25 19…
$ tnx_dow       <fct> Thu, Fri, Mon, Mon, Mon, Tue, Thu, Sun, Mon, Fri, Mon, S…
$ tnx_month     <fct> Feb, May, May, Jun, Mar, Apr, May, May, Jun, Jun, Jun, F…
$ tnx_week      <chr> "06", "17", "21", "22", "13", "17", "18", "18", "24", "2…
$ cd_count      <int> 1, 1, 2, 4, 3, 3, 1, 1, 1, 2, 2, 2, 2, 1, 2, 4, 8, 4, 8,…
$ total_spend   <dbl> 11.49, 11.49, 23.87, 54.85, 27.47, 36.97, 11.88, 9.49, 1…

Finally, we want to extract the first transaction for each customer, so we can add this data to assess our models.

Code
customer_initial_tnx_tbl <- customer_fit_transactions_tbl |>
  slice_min(n = 1, order_by = tnx_timestamp, by = customer_id)

customer_initial_tnx_tbl |> glimpse()
Rows: 1,000
Columns: 8
$ customer_id   <fct> 00013, 00016, 00080, 00105, 00131, 00144, 00149, 00168, …
$ tnx_date      <date> 1997-01-01, 1997-01-01, 1997-01-01, 1997-01-01, 1997-01…
$ tnx_timestamp <dttm> 1997-01-01 22:58:54, 1997-01-01 23:18:11, 1997-01-01 07…
$ tnx_dow       <fct> Wed, Wed, Wed, Wed, Wed, Wed, Wed, Wed, Wed, Wed, Wed, T…
$ tnx_month     <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, J…
$ tnx_week      <chr> "00", "00", "00", "00", "00", "00", "00", "00", "00", "0…
$ cd_count      <int> 4, 1, 4, 3, 2, 2, 4, 1, 5, 3, 1, 1, 1, 2, 1, 2, 1, 1, 2,…
$ total_spend   <dbl> 72.94, 9.77, 43.46, 35.31, 30.32, 30.72, 54.87, 14.96, 5…

We now expand out these initial transactions so that we can append them to our simulations.

Code
sim_init_tbl <- customer_initial_tnx_tbl |>
  transmute(
    customer_id,
    draw_id       = list(1:n_sim),
    tnx_timestamp,
    tnx_amount    = total_spend
    ) |>
  unnest(draw_id)

sim_init_tbl |> glimpse()
Rows: 2,000,000
Columns: 4
$ customer_id   <fct> 00013, 00013, 00013, 00013, 00013, 00013, 00013, 00013, …
$ draw_id       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
$ tnx_timestamp <dttm> 1997-01-01 22:58:54, 1997-01-01 22:58:54, 1997-01-01 22…
$ tnx_amount    <dbl> 72.94, 72.94, 72.94, 72.94, 72.94, 72.94, 72.94, 72.94, …

Before we start on that, we set a few parameters for the workbook to organise our Stan code.

Code
stan_modeldir <- "stan_models"
stan_codedir  <-   "stan_code"

2 Fit First Hierarchical Lambda Model

Our first hierarchical model puts a hierarchical prior around the mean of our population \(\lambda\) - lambda_mn.

Once again we use a Gamma prior for it.

2.1 Compile and Fit Stan Model

We now compile this model using CmdStanR.

Code
pnbd_onehierlambda_stanmodel <- cmdstan_model(
  "stan_code/pnbd_onehier_lambda.stan",
  include_paths =   stan_codedir,
  pedantic      =           TRUE,
  dir           =  stan_modeldir
  )

We then use this compiled model with our data to produce a fit of the data.

Code
stan_modelname <- "pnbd_cdnow_onehierlambda1"
stanfit_prefix <- str_c("fit_", stan_modelname)
stanfit_seed   <- stanfit_seed + 1

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")


stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    hier_lambda_mn_p1 = 0.25,
    hier_lambda_mn_p2 = 1,

    lambda_cv = 1.00,
    
    mu_mn     = 0.10,
    mu_cv     = 1.00,
    )

if(!file_exists(stanfit_object_file)) {
  pnbd_cdnow_onehierlambda1_stanfit <- pnbd_onehierlambda_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_cdnow_onehierlambda1_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  message(glue("Found file {stanfit_object_file}. Loading..."))
  
  pnbd_cdnow_onehierlambda1_stanfit <- read_rds(stanfit_object_file)
}
Running MCMC with 4 chains, at most 12 in parallel...

Chain 1 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 2 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 3 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 4 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 3 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 1 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 2 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 4 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 3 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 1 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 2 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 4 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 3 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 1 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 2 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 4 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 3 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 1 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 2 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 4 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 3 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 3 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 1 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 1 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 2 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 2 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 4 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 4 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 3 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 1 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 2 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 4 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 3 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 1 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 2 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 4 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 3 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 1 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 2 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 4 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 3 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 1 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 2 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 4 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 3 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 3 finished in 2031.3 seconds.
Chain 1 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 1 finished in 2044.1 seconds.
Chain 2 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 2 finished in 2063.9 seconds.
Chain 4 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 4 finished in 2068.2 seconds.

All 4 chains finished successfully.
Mean chain execution time: 2051.9 seconds.
Total execution time: 2068.7 seconds.
Code
pnbd_cdnow_onehierlambda1_stanfit$print()
  variable       mean     median     sd    mad         q5        q95 rhat
 lp__      -247556.09 -247558.00 175.32 183.84 -247848.00 -247266.95 1.00
 lambda_mn       0.07       0.07   0.00   0.00       0.06       0.07 1.00
 lambda[1]       0.07       0.06   0.05   0.04       0.01       0.18 1.00
 lambda[2]       0.17       0.16   0.05   0.05       0.09       0.26 1.00
 lambda[3]       0.06       0.06   0.03   0.03       0.02       0.12 1.00
 lambda[4]       0.03       0.03   0.02   0.02       0.00       0.07 1.00
 lambda[5]       0.04       0.03   0.03   0.03       0.01       0.10 1.00
 lambda[6]       0.10       0.09   0.06   0.05       0.02       0.21 1.00
 lambda[7]       0.05       0.04   0.03   0.02       0.01       0.09 1.00
 lambda[8]       0.06       0.05   0.03   0.03       0.02       0.12 1.00
 ess_bulk ess_tail
      646      865
     1660     1701
     2404     1127
     3293     1313
     3093     1252
     2756     1144
     3271     1517
     2956     1514
     3484     1125
     3628     1491

 # showing 10 of 70713 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_cdnow_onehierlambda1_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehierlambda1-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehierlambda1-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehierlambda1-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehierlambda1-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete, no problems detected.

2.2 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "lambda_mn",
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_cdnow_onehierlambda1_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We also check \(N_{eff}\) as a quick diagnostic of the fit.

Code
pnbd_cdnow_onehierlambda1_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_cdnow_onehierlambda1_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

2.3 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_cdnow_onehierlambda1_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_cdnow_onehierlambda1_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_cdnow_onehierlambda1",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 2110
  )

pnbd_cdnow_onehierlambda1_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_cdnow_onehierlambda1_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_cdnow_onehierlambda1_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_cdnow_onehierlambda1_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_cdnow_onehierlambda1_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_cdnow_onehierlambda1_assess_valid_simstats_tbl.rds"

2.3.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_cdnow_onehierlambda1_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

2.3.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_cdnow_onehierlambda1_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

As for our short time frame data, overall our model is working well.

3 Fit Second Hierarchical Lambda Model

In this model, we are going with a broadly similar model but we are instead using a different mean for our hierarchical prior.

3.1 Fit Stan Model

We now want to fit the model to our data using this alternative prior for lambda_mn.

Code
stan_modelname <- "pnbd_cdnow_onehierlambda2"
stanfit_prefix <- str_c("fit_", stan_modelname)
stanfit_seed   <- stanfit_seed + 1

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")


stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    hier_lambda_mn_p1 = 0.50,
    hier_lambda_mn_p2 = 1,

    lambda_cv = 1.00,
    
    mu_mn     = 0.10,
    mu_cv     = 1.00,
    )

if(!file_exists(stanfit_object_file)) {
  pnbd_cdnow_onehierlambda2_stanfit <- pnbd_onehierlambda_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_cdnow_onehierlambda2_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  message(glue("Found file {stanfit_object_file}. Loading..."))
  
  pnbd_cdnow_onehierlambda2_stanfit <- read_rds(stanfit_object_file)
}
Running MCMC with 4 chains, at most 12 in parallel...

Chain 1 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 2 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 3 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 4 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 4 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 2 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 3 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 1 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 4 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 2 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 3 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 1 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 4 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 2 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 3 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 1 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 4 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 3 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 2 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 1 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 4 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 4 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 2 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 3 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 3 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 2 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 4 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 1 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 1 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 3 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 2 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 4 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 1 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 2 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 3 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 4 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 1 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 3 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 2 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 4 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 1 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 3 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 2 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 4 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 4 finished in 2123.5 seconds.
Chain 1 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 3 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 2 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 3 finished in 2170.2 seconds.
Chain 2 finished in 2170.7 seconds.
Chain 1 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 1 finished in 2199.9 seconds.

All 4 chains finished successfully.
Mean chain execution time: 2166.1 seconds.
Total execution time: 2201.3 seconds.
Code
pnbd_cdnow_onehierlambda2_stanfit$print()
  variable       mean     median     sd    mad         q5        q95 rhat
 lp__      -247550.81 -247556.00 170.70 173.46 -247825.05 -247265.00 1.01
 lambda_mn       0.07       0.07   0.00   0.00       0.06       0.07 1.00
 lambda[1]       0.07       0.05   0.05   0.04       0.01       0.17 1.00
 lambda[2]       0.17       0.16   0.05   0.05       0.10       0.26 1.01
 lambda[3]       0.06       0.06   0.03   0.03       0.02       0.12 1.00
 lambda[4]       0.03       0.03   0.02   0.02       0.00       0.07 1.00
 lambda[5]       0.04       0.04   0.03   0.03       0.01       0.11 1.00
 lambda[6]       0.10       0.08   0.06   0.05       0.02       0.21 1.00
 lambda[7]       0.05       0.04   0.03   0.02       0.01       0.10 1.00
 lambda[8]       0.06       0.05   0.03   0.03       0.02       0.12 1.00
 ess_bulk ess_tail
      634      988
     1470     1554
     3128     1406
     3247     1618
     3618     1452
     2358      912
     3406     1232
     3851     1259
     3604     1305
     3814     1173

 # showing 10 of 70713 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_cdnow_onehierlambda2_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehierlambda2-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehierlambda2-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehierlambda2-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehierlambda2-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete, no problems detected.

3.2 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "lambda_mn",
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_cdnow_onehierlambda2_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We also check \(N_{eff}\) as a quick diagnostic of the fit.

Code
pnbd_cdnow_onehierlambda2_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_cdnow_onehierlambda2_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

3.3 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_cdnow_onehierlambda2_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_cdnow_onehierlambda2_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_cdnow_onehierlambda2",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 2110
  )

pnbd_cdnow_onehierlambda2_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_cdnow_onehierlambda2_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_cdnow_onehierlambda2_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_cdnow_onehierlambda2_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_cdnow_onehierlambda2_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_cdnow_onehierlambda2_assess_valid_simstats_tbl.rds"

3.3.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_cdnow_onehierlambda2_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

3.3.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_cdnow_onehierlambda2_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

As for our short time frame data, overall our model is working well.

4 Fit First Hierarchical Mu Model

We now construct the same hierarchical model but based around mu_mn.

4.1 Compile and Fit Stan Model

We compile this model using CmdStanR.

Code
pnbd_onehiermu_stanmodel <- cmdstan_model(
  "stan_code/pnbd_onehier_mu.stan",
  include_paths =   stan_codedir,
  pedantic      =           TRUE,
  dir           =  stan_modeldir
  )

We then use this compiled model with our data to produce a fit of the data.

Code
stan_modelname <- "pnbd_cdnow_onehiermu1"
stanfit_prefix <- str_c("fit_", stan_modelname)
stanfit_seed   <- stanfit_seed + 1

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")


stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    hier_mu_mn_p1 = 0.50,
    hier_mu_mn_p2 = 1.00,

    lambda_mn     = 0.25,
    lambda_cv     = 1.00,

    mu_cv         = 1.00

    )

if(!file_exists(stanfit_object_file)) {
  pnbd_cdnow_onehiermu1_stanfit <- pnbd_onehiermu_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_cdnow_onehiermu1_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  message(glue("Found file {stanfit_object_file}. Loading..."))
  
  pnbd_cdnow_onehiermu1_stanfit <- read_rds(stanfit_object_file)
}
Running MCMC with 4 chains, at most 12 in parallel...

Chain 1 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 2 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 3 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 4 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 3 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 4 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 1 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 2 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 3 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 4 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 1 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 2 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 3 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 4 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 1 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 2 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 3 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 4 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 1 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 2 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 3 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 3 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 4 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 4 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 1 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 1 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 2 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 2 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 3 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 4 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 1 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 2 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 3 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 4 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 1 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 2 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 3 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 4 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 1 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 2 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 3 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 4 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 1 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 2 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 3 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 3 finished in 1354.8 seconds.
Chain 4 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 4 finished in 1360.2 seconds.
Chain 1 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 1 finished in 1377.4 seconds.
Chain 2 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 2 finished in 1405.9 seconds.

All 4 chains finished successfully.
Mean chain execution time: 1374.6 seconds.
Total execution time: 1406.5 seconds.
Code
pnbd_cdnow_onehiermu1_stanfit$print()
  variable       mean     median     sd    mad         q5        q95 rhat
 lp__      -232783.62 -232782.00 170.13 170.50 -233050.05 -232510.00 1.01
 mu_mn           0.11       0.11   0.00   0.00       0.11       0.12 1.01
 lambda[1]       0.14       0.11   0.11   0.09       0.02       0.36 1.00
 lambda[2]       0.20       0.19   0.06   0.06       0.11       0.31 1.00
 lambda[3]       0.07       0.07   0.04   0.03       0.03       0.14 1.00
 lambda[4]       0.04       0.03   0.03   0.02       0.01       0.09 1.00
 lambda[5]       0.06       0.05   0.05   0.04       0.01       0.15 1.00
 lambda[6]       0.17       0.15   0.11   0.10       0.04       0.38 1.00
 lambda[7]       0.06       0.05   0.03   0.03       0.01       0.12 1.00
 lambda[8]       0.07       0.06   0.04   0.03       0.02       0.14 1.00
 ess_bulk ess_tail
      650     1019
      648      979
     3004     1287
     3662     1364
     2978     1294
     3009     1112
     2989     1066
     2913     1316
     3925     1141
     3240     1408

 # showing 10 of 70713 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_cdnow_onehiermu1_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehiermu1-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehiermu1-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehiermu1-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehiermu1-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete, no problems detected.

4.2 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "mu_mn",
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_cdnow_onehiermu1_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We also check \(N_{eff}\) as a quick diagnostic of the fit.

Code
pnbd_cdnow_onehiermu1_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_cdnow_onehiermu1_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

4.3 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_cdnow_onehiermu1_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_cdnow_onehiermu1_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_cdnow_onehiermu1",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 2110
  )

pnbd_cdnow_onehiermu1_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_cdnow_onehiermu1_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_cdnow_onehiermu1_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_cdnow_onehiermu1_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_cdnow_onehiermu1_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_cdnow_onehiermu1_assess_valid_simstats_tbl.rds"

4.3.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_cdnow_onehiermu1_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

4.3.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_cdnow_onehiermu1_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

As for our short time frame data, overall our model is working well.

5 Fit Second Hierarchical Mu Model

In this model, we are going with a broadly similar model but we are instead using a different mean for our hierarchical prior.

5.1 Fit Stan Model

We now want to fit the model to our data using this alternative prior for lambda_mn.

Code
stan_modelname <- "pnbd_cdnow_onehiermu2"
stanfit_prefix <- str_c("fit_", stan_modelname)
stanfit_seed   <- stanfit_seed + 1

stanfit_object_file <- glue("data/{stanfit_prefix}_stanfit.rds")


stan_data_lst <- customer_fit_stats_tbl |>
  select(customer_id, x, t_x, T_cal) |>
  compose_data(
    hier_mu_mn_p1 = 0.25,
    hier_mu_mn_p2 = 1.00,

    lambda_mn     = 0.25,
    lambda_cv     = 1.00,

    mu_cv         = 1.00
    )

if(!file_exists(stanfit_object_file)) {
  pnbd_cdnow_onehiermu2_stanfit <- pnbd_onehiermu_stanmodel$sample(
    data            =                stan_data_lst,
    chains          =                            4,
    iter_warmup     =                          500,
    iter_sampling   =                          500,
    seed            =                 stanfit_seed,
    save_warmup     =                         TRUE,
    output_dir      =                stan_modeldir,
    output_basename =               stanfit_prefix,
    )
  
  pnbd_cdnow_onehiermu2_stanfit$save_object(stanfit_object_file, compress = "gzip")

} else {
  message(glue("Found file {stanfit_object_file}. Loading..."))
  
  pnbd_cdnow_onehiermu2_stanfit <- read_rds(stanfit_object_file)
}
Running MCMC with 4 chains, at most 12 in parallel...

Chain 1 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 2 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 3 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 4 Iteration:   1 / 1000 [  0%]  (Warmup) 
Chain 4 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 3 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 1 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 2 Iteration: 100 / 1000 [ 10%]  (Warmup) 
Chain 4 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 3 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 1 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 2 Iteration: 200 / 1000 [ 20%]  (Warmup) 
Chain 4 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 3 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 1 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 2 Iteration: 300 / 1000 [ 30%]  (Warmup) 
Chain 4 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 3 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 1 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 2 Iteration: 400 / 1000 [ 40%]  (Warmup) 
Chain 4 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 4 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 3 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 3 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 2 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 2 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 1 Iteration: 500 / 1000 [ 50%]  (Warmup) 
Chain 1 Iteration: 501 / 1000 [ 50%]  (Sampling) 
Chain 4 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 3 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 2 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 1 Iteration: 600 / 1000 [ 60%]  (Sampling) 
Chain 4 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 3 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 2 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 1 Iteration: 700 / 1000 [ 70%]  (Sampling) 
Chain 4 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 3 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 2 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 1 Iteration: 800 / 1000 [ 80%]  (Sampling) 
Chain 4 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 3 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 2 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 1 Iteration: 900 / 1000 [ 90%]  (Sampling) 
Chain 4 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 4 finished in 1522.6 seconds.
Chain 3 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 3 finished in 1528.9 seconds.
Chain 2 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 2 finished in 1555.4 seconds.
Chain 1 Iteration: 1000 / 1000 [100%]  (Sampling) 
Chain 1 finished in 1557.4 seconds.

All 4 chains finished successfully.
Mean chain execution time: 1541.1 seconds.
Total execution time: 6347.5 seconds.
Code
pnbd_cdnow_onehiermu2_stanfit$print()
  variable       mean     median     sd    mad         q5        q95 rhat
 lp__      -232771.42 -232772.00 175.83 169.02 -233051.05 -232476.90 1.00
 mu_mn           0.11       0.11   0.00   0.00       0.11       0.12 1.00
 lambda[1]       0.14       0.11   0.11   0.09       0.02       0.35 1.00
 lambda[2]       0.20       0.20   0.06   0.06       0.12       0.31 1.00
 lambda[3]       0.07       0.07   0.04   0.03       0.02       0.14 1.00
 lambda[4]       0.04       0.03   0.03   0.02       0.01       0.09 1.00
 lambda[5]       0.06       0.05   0.05   0.04       0.01       0.16 1.00
 lambda[6]       0.17       0.15   0.11   0.09       0.04       0.38 1.00
 lambda[7]       0.06       0.05   0.03   0.03       0.02       0.11 1.01
 lambda[8]       0.07       0.06   0.04   0.03       0.02       0.14 1.00
 ess_bulk ess_tail
      593     1081
      608     1047
     2390      977
     4559     1465
     2878      992
     3176     1287
     3609     1406
     2957     1076
     4059     1196
     3798      988

 # showing 10 of 70713 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)

We have some basic HMC-based validity statistics we can check.

Code
pnbd_cdnow_onehiermu2_stanfit$cmdstan_diagnose()
Processing csv files: /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehiermu2-1.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehiermu2-2.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehiermu2-3.csvWarning: non-fatal error reading adaptation data
, /home/rstudio/btydwork/stan_models/fit_pnbd_cdnow_onehiermu2-4.csvWarning: non-fatal error reading adaptation data


Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
No divergent transitions found.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete, no problems detected.

5.2 Visual Diagnostics of the Sample Validity

Now that we have a sample from the posterior distribution we need to create a few different visualisations of the diagnostics.

Code
parameter_subset <- c(
  "mu_mn",
  "lambda[1]", "lambda[2]", "lambda[3]", "lambda[4]",
  "mu[1]",     "mu[2]",     "mu[3]",     "mu[4]"
  )

pnbd_cdnow_onehiermu2_stanfit$draws(inc_warmup = FALSE) |>
  mcmc_trace(pars = parameter_subset) +
  expand_limits(y = 0) +
  labs(
    x = "Iteration",
    y = "Value",
    title = "Traceplot of Sample of Lambda and Mu Values"
    ) +
  theme(axis.text.x = element_text(size = 10))

We also check \(N_{eff}\) as a quick diagnostic of the fit.

Code
pnbd_cdnow_onehiermu2_stanfit |>
  neff_ratio(pars = c("lambda", "mu")) |>
  mcmc_neff() +
    ggtitle("Plot of Parameter Effective Sample Sizes")

Finally, we want to check out the energy diagnostic, which is often indicative of problems with the posterior mixing.

Code
pnbd_cdnow_onehiermu2_stanfit |>
  nuts_params() |>
  mcmc_nuts_energy(binwidth = 50)

5.3 Assess the Model

As we intend to run the same logic to assess each of our models, we have combined all this logic into a single function run_model_assessment, to run the simulations and combine the datasets.

Code
pnbd_stanfit <- pnbd_cdnow_onehiermu2_stanfit |>
  recover_types(customer_fit_stats_tbl)

pnbd_cdnow_onehiermu2_assess_data_lst <- run_model_assessment(
  model_stanfit    = pnbd_stanfit,
  insample_tbl     = customer_fit_subset_tbl,
  fit_label        = "pnbd_cdnow_onehiermu2",
  fit_end_dttm     = use_fit_end_date     |> as.POSIXct(),
  valid_start_dttm = use_valid_start_date |> as.POSIXct(),
  valid_end_dttm   = use_valid_end_date   |> as.POSIXct(),
  sim_seed         = 2110
  )

pnbd_cdnow_onehiermu2_assess_data_lst |> glimpse()
List of 5
 $ model_fit_index_filepath     : 'glue' chr "data/pnbd_cdnow_onehiermu2_assess_fit_index_tbl.rds"
 $ model_valid_index_filepath   : 'glue' chr "data/pnbd_cdnow_onehiermu2_assess_valid_index_tbl.rds"
 $ model_simstats_filepath      : 'glue' chr "data/pnbd_cdnow_onehiermu2_assess_model_simstats_tbl.rds"
 $ model_fit_simstats_filepath  : 'glue' chr "data/pnbd_cdnow_onehiermu2_assess_fit_simstats_tbl.rds"
 $ model_valid_simstats_filepath: 'glue' chr "data/pnbd_cdnow_onehiermu2_assess_valid_simstats_tbl.rds"

5.3.1 Check In-Sample Data Validation

We first check the model against the in-sample data.

Code
simdata_tbl <- pnbd_cdnow_onehiermu2_assess_data_lst |>
  use_series(model_fit_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  bind_rows(sim_init_tbl) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_fit_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

This fit looks reasonable and appears to capture most of the aspects of the data used to fit it. Given that this is a synthetic dataset, this is not surprising, but at least we appreciate that our model is valid.

5.3.2 Check Out-of-Sample Data Validation

We now repeat for the out-of-sample data.

Code
simdata_tbl <- pnbd_cdnow_onehiermu2_assess_data_lst |>
  use_series(model_valid_index_filepath) |>
  read_rds() |>
  use_series(sim_file) |>
  map_dfr(read_rds) |>
  select(customer_id, draw_id, sim_data) |>
  unnest(sim_data) |>
  arrange(customer_id, draw_id, tnx_timestamp)


assess_plots_lst <- create_model_assessment_plots(
  obsdata_tbl = customer_valid_transactions_tbl,
  simdata_tbl = simdata_tbl
  )

assess_plots_lst |> map(print)

$total_plot


$quant_plot

As for our short time frame data, overall our model is working well.

6 Compare Model Outputs

We have looked at each of the models individually, but it is also worth looking at each of the models as a group.

We now want to combine both the fit and valid transaction sets to calculate the summary statistics for both.

Code
obs_summstats_tbl <- list(
    fit   = customer_fit_transactions_tbl,
    valid = customer_valid_transactions_tbl
    ) |>
  bind_rows(.id = "assess_type") |>
  group_by(assess_type) |>
  calculate_transaction_summary_statistics() |>
  pivot_longer(
    cols      = !assess_type,
    names_to  = "label",
    values_to = "obs_value"
    )

obs_summstats_tbl |> glimpse()
Rows: 16
Columns: 3
$ assess_type <chr> "fit", "fit", "fit", "fit", "fit", "fit", "fit", "fit", "v…
$ label       <chr> "p10", "p25", "p50", "p75", "p90", "p99", "total_count", "…
$ obs_value   <dbl> 1.000000, 1.000000, 1.000000, 3.000000, 4.100000, 14.00000…
Code
model_assess_transactions_tbl <- dir_ls("data", regexp = "pnbd_cdnow_(fixed|one).*_assess_.*index") |>
  enframe(name = NULL, value = "file_path") |>
  mutate(
    model_label = str_replace(file_path, "data/pnbd_cdnow_(.*?)_assess_.*", "\\1"),
    assess_type = if_else(str_detect(file_path, "_assess_fit_"), "fit", "valid"),
    
    assess_data = map(
      file_path, construct_model_assessment_data,
      
      .progress = "construct_assess_data"
      )
    ) |>
  select(model_label, assess_type, assess_data) |>
  unnest(assess_data)

model_assess_transactions_tbl |> glimpse()
Rows: 31,882,590
Columns: 6
$ model_label   <chr> "fixed1", "fixed1", "fixed1", "fixed1", "fixed1", "fixed…
$ assess_type   <chr> "fit", "fit", "fit", "fit", "fit", "fit", "fit", "fit", …
$ customer_id   <fct> 00233, 00233, 00233, 00233, 00233, 00233, 00233, 00233, …
$ draw_id       <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5,…
$ tnx_timestamp <dttm> 1997-01-07 17:19:02, 1997-01-08 05:04:59, 1997-01-11 17…
$ tnx_amount    <dbl> 647.50, 59.53, 9.24, 3.45, 33.03, 82.41, 35.28, 2.12, 10…

We now want to calculate the transaction statistics on this full dataset, for each separate draw.

Code
model_assess_tbl <- model_assess_transactions_tbl |>
  group_by(model_label, assess_type, draw_id) |>
  calculate_transaction_summary_statistics()

model_assess_tbl |> glimpse()
Rows: 32,000
Columns: 11
$ model_label <chr> "fixed1", "fixed1", "fixed1", "fixed1", "fixed1", "fixed1"…
$ assess_type <chr> "fit", "fit", "fit", "fit", "fit", "fit", "fit", "fit", "f…
$ draw_id     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…
$ p10         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ p25         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ p50         <dbl> 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
$ p75         <dbl> 5, 5, 5, 5, 4, 5, 5, 5, 4, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5…
$ p90         <dbl> 9.0, 8.1, 8.5, 8.0, 8.0, 8.0, 9.0, 8.0, 9.0, 8.0, 8.0, 9.3…
$ p99         <dbl> 30.92, 21.91, 21.55, 22.60, 21.16, 20.00, 20.00, 27.00, 19…
$ total_count <int> 2313, 1946, 2184, 2073, 2058, 1980, 2199, 2199, 1998, 2058…
$ mean_count  <dbl> 4.307263, 3.815686, 4.000000, 3.831793, 3.790055, 3.659889…

We now combine all this data to create a number of different comparison plots for the various summary statistics.

Code
#! echo: TRUE

create_multiple_model_assessment_plot(
  obs_summstats_tbl, model_assess_tbl,
  "total_count", "Total Transactions"
  )

Code
create_multiple_model_assessment_plot(
  obs_summstats_tbl, model_assess_tbl,
  "mean_count", "Average Transactions per Customer"
  )

Code
create_multiple_model_assessment_plot(
  obs_summstats_tbl, model_assess_tbl,
  "p99", "99th Percentile Count"
  )

6.1 Write Assessment Data to Disk

We now want to save the assessment data to disk.

Code
model_assess_tbl |> write_rds("data/assess_data_pnbd_cdnow_onehier_tbl.rds")

R Environment

Code
options(width = 120L)
sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.3.1 (2023-06-16)
 os       Ubuntu 22.04.3 LTS
 system   x86_64, linux-gnu
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       Europe/Dublin
 date     2023-12-07
 pandoc   3.1.1 @ /usr/local/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
 package        * version    date (UTC) lib source
 abind            1.4-5      2016-07-21 [1] RSPM (R 4.3.0)
 arrayhelpers     1.1-0      2020-02-04 [1] RSPM (R 4.3.0)
 backports        1.4.1      2021-12-13 [1] RSPM (R 4.3.0)
 base64enc        0.1-3      2015-07-28 [1] RSPM (R 4.3.0)
 bayesplot      * 1.10.0     2022-11-16 [1] RSPM (R 4.3.0)
 bit              4.0.5      2022-11-15 [1] RSPM (R 4.3.0)
 bit64            4.0.5      2020-08-30 [1] RSPM (R 4.3.0)
 bridgesampling   1.1-2      2021-04-16 [1] RSPM (R 4.3.0)
 brms           * 2.20.4     2023-09-25 [1] RSPM (R 4.3.0)
 Brobdingnag      1.2-9      2022-10-19 [1] RSPM (R 4.3.0)
 cachem           1.0.8      2023-05-01 [1] RSPM (R 4.3.0)
 callr            3.7.3      2022-11-02 [1] RSPM (R 4.3.0)
 checkmate        2.3.0      2023-10-25 [1] RSPM (R 4.3.0)
 cli              3.6.1      2023-03-23 [1] RSPM (R 4.3.0)
 cmdstanr       * 0.6.0.9000 2023-11-21 [1] Github (stan-dev/cmdstanr@a13c798)
 coda             0.19-4     2020-09-30 [1] RSPM (R 4.3.0)
 codetools        0.2-19     2023-02-01 [2] CRAN (R 4.3.1)
 colorspace       2.1-0      2023-01-23 [1] RSPM (R 4.3.0)
 colourpicker     1.3.0      2023-08-21 [1] RSPM (R 4.3.0)
 conflicted     * 1.2.0      2023-02-01 [1] RSPM (R 4.3.0)
 cowplot        * 1.1.1      2020-12-30 [1] RSPM (R 4.3.0)
 crayon           1.5.2      2022-09-29 [1] RSPM (R 4.3.0)
 crosstalk        1.2.0      2021-11-04 [1] RSPM (R 4.3.0)
 curl             5.1.0      2023-10-02 [1] RSPM (R 4.3.0)
 data.table       1.14.8     2023-02-17 [1] RSPM (R 4.3.0)
 digest           0.6.33     2023-07-07 [1] RSPM (R 4.3.0)
 directlabels   * 2023.8.25  2023-09-01 [1] RSPM (R 4.3.0)
 distributional   0.3.2      2023-03-22 [1] RSPM (R 4.3.0)
 dplyr          * 1.1.3      2023-09-03 [1] RSPM (R 4.3.0)
 DT               0.30       2023-10-05 [1] RSPM (R 4.3.0)
 dygraphs         1.1.1.6    2018-07-11 [1] RSPM (R 4.3.0)
 ellipsis         0.3.2      2021-04-29 [1] RSPM (R 4.3.0)
 evaluate         0.22       2023-09-29 [1] RSPM (R 4.3.0)
 fansi            1.0.5      2023-10-08 [1] RSPM (R 4.3.0)
 farver           2.1.1      2022-07-06 [1] RSPM (R 4.3.0)
 fastmap          1.1.1      2023-02-24 [1] RSPM (R 4.3.0)
 forcats        * 1.0.0      2023-01-29 [1] RSPM (R 4.3.0)
 fs             * 1.6.3      2023-07-20 [1] RSPM (R 4.3.0)
 furrr          * 0.3.1      2022-08-15 [1] RSPM (R 4.3.0)
 future         * 1.33.0     2023-07-01 [1] RSPM (R 4.3.0)
 generics         0.1.3      2022-07-05 [1] RSPM (R 4.3.0)
 ggdist           3.3.0      2023-05-13 [1] RSPM (R 4.3.0)
 ggplot2        * 3.4.4      2023-10-12 [1] RSPM (R 4.3.0)
 globals          0.16.2     2022-11-21 [1] RSPM (R 4.3.0)
 glue           * 1.6.2      2022-02-24 [1] RSPM (R 4.3.0)
 gridExtra        2.3        2017-09-09 [1] RSPM (R 4.3.0)
 gtable           0.3.4      2023-08-21 [1] RSPM (R 4.3.0)
 gtools           3.9.4      2022-11-27 [1] RSPM (R 4.3.0)
 hms              1.1.3      2023-03-21 [1] RSPM (R 4.3.0)
 htmltools        0.5.6.1    2023-10-06 [1] RSPM (R 4.3.0)
 htmlwidgets      1.6.2      2023-03-17 [1] RSPM (R 4.3.0)
 httpuv           1.6.12     2023-10-23 [1] RSPM (R 4.3.0)
 igraph           1.5.1      2023-08-10 [1] RSPM (R 4.3.0)
 inline           0.3.19     2021-05-31 [1] RSPM (R 4.3.0)
 jsonlite         1.8.7      2023-06-29 [1] RSPM (R 4.3.0)
 knitr            1.44       2023-09-11 [1] RSPM (R 4.3.0)
 labeling         0.4.3      2023-08-29 [1] RSPM (R 4.3.0)
 later            1.3.1      2023-05-02 [1] RSPM (R 4.3.0)
 lattice          0.21-8     2023-04-05 [2] CRAN (R 4.3.1)
 lifecycle        1.0.3      2022-10-07 [1] RSPM (R 4.3.0)
 listenv          0.9.0      2022-12-16 [1] RSPM (R 4.3.0)
 loo              2.6.0      2023-03-31 [1] RSPM (R 4.3.0)
 lubridate      * 1.9.3      2023-09-27 [1] RSPM (R 4.3.0)
 magrittr       * 2.0.3      2022-03-30 [1] RSPM (R 4.3.0)
 markdown         1.11       2023-10-19 [1] RSPM (R 4.3.0)
 Matrix           1.5-4.1    2023-05-18 [2] CRAN (R 4.3.1)
 matrixStats      1.0.0      2023-06-02 [1] RSPM (R 4.3.0)
 memoise          2.0.1      2021-11-26 [1] RSPM (R 4.3.0)
 mime             0.12       2021-09-28 [1] RSPM (R 4.3.0)
 miniUI           0.1.1.1    2018-05-18 [1] RSPM (R 4.3.0)
 munsell          0.5.0      2018-06-12 [1] RSPM (R 4.3.0)
 mvtnorm          1.2-3      2023-08-25 [1] RSPM (R 4.3.0)
 nlme             3.1-162    2023-01-31 [2] CRAN (R 4.3.1)
 parallelly       1.36.0     2023-05-26 [1] RSPM (R 4.3.0)
 pillar           1.9.0      2023-03-22 [1] RSPM (R 4.3.0)
 pkgbuild         1.4.2      2023-06-26 [1] RSPM (R 4.3.0)
 pkgconfig        2.0.3      2019-09-22 [1] RSPM (R 4.3.0)
 plyr             1.8.9      2023-10-02 [1] RSPM (R 4.3.0)
 posterior      * 1.4.1      2023-03-14 [1] RSPM (R 4.3.0)
 prettyunits      1.2.0      2023-09-24 [1] RSPM (R 4.3.0)
 processx         3.8.2      2023-06-30 [1] RSPM (R 4.3.0)
 promises         1.2.1      2023-08-10 [1] RSPM (R 4.3.0)
 ps               1.7.5      2023-04-18 [1] RSPM (R 4.3.0)
 purrr          * 1.0.2      2023-08-10 [1] RSPM (R 4.3.0)
 quadprog         1.5-8      2019-11-20 [1] RSPM (R 4.3.0)
 QuickJSR         1.0.7      2023-10-15 [1] RSPM (R 4.3.0)
 R6               2.5.1      2021-08-19 [1] RSPM (R 4.3.0)
 Rcpp           * 1.0.11     2023-07-06 [1] RSPM (R 4.3.0)
 RcppParallel     5.1.7      2023-02-27 [1] RSPM (R 4.3.0)
 readr          * 2.1.4      2023-02-10 [1] RSPM (R 4.3.0)
 reshape2         1.4.4      2020-04-09 [1] RSPM (R 4.3.0)
 rlang          * 1.1.1      2023-04-28 [1] RSPM (R 4.3.0)
 rmarkdown        2.25       2023-09-18 [1] RSPM (R 4.3.0)
 rstan            2.32.3     2023-10-15 [1] RSPM (R 4.3.0)
 rstantools       2.3.1.1    2023-07-18 [1] RSPM (R 4.3.0)
 rstudioapi       0.15.0     2023-07-07 [1] RSPM (R 4.3.0)
 scales         * 1.2.1      2022-08-20 [1] RSPM (R 4.3.0)
 sessioninfo      1.2.2      2021-12-06 [1] RSPM (R 4.3.0)
 shiny            1.7.5.1    2023-10-14 [1] RSPM (R 4.3.0)
 shinyjs          2.1.0      2021-12-23 [1] RSPM (R 4.3.0)
 shinystan        2.6.0      2022-03-03 [1] RSPM (R 4.3.0)
 shinythemes      1.2.0      2021-01-25 [1] RSPM (R 4.3.0)
 StanHeaders      2.26.28    2023-09-07 [1] RSPM (R 4.3.0)
 stringi          1.7.12     2023-01-11 [1] RSPM (R 4.3.0)
 stringr        * 1.5.0      2022-12-02 [1] RSPM (R 4.3.0)
 svUnit           1.0.6      2021-04-19 [1] RSPM (R 4.3.0)
 tensorA          0.36.2     2020-11-19 [1] RSPM (R 4.3.0)
 threejs          0.3.3      2020-01-21 [1] RSPM (R 4.3.0)
 tibble         * 3.2.1      2023-03-20 [1] RSPM (R 4.3.0)
 tidybayes      * 3.0.6      2023-08-12 [1] RSPM (R 4.3.0)
 tidyr          * 1.3.0      2023-01-24 [1] RSPM (R 4.3.0)
 tidyselect       1.2.0      2022-10-10 [1] RSPM (R 4.3.0)
 tidyverse      * 2.0.0      2023-02-22 [1] RSPM (R 4.3.0)
 timechange       0.2.0      2023-01-11 [1] RSPM (R 4.3.0)
 tzdb             0.4.0      2023-05-12 [1] RSPM (R 4.3.0)
 utf8             1.2.4      2023-10-22 [1] RSPM (R 4.3.0)
 V8               4.4.0      2023-10-09 [1] RSPM (R 4.3.0)
 vctrs            0.6.4      2023-10-12 [1] RSPM (R 4.3.0)
 vroom            1.6.4      2023-10-02 [1] RSPM (R 4.3.0)
 withr            2.5.1      2023-09-26 [1] RSPM (R 4.3.0)
 xfun             0.40       2023-08-09 [1] RSPM (R 4.3.0)
 xtable           1.8-4      2019-04-21 [1] RSPM (R 4.3.0)
 xts              0.13.1     2023-04-16 [1] RSPM (R 4.3.0)
 yaml             2.3.7      2023-01-23 [1] RSPM (R 4.3.0)
 zoo              1.8-12     2023-04-13 [1] RSPM (R 4.3.0)

 [1] /usr/local/lib/R/site-library
 [2] /usr/local/lib/R/library

──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Code
options(width = 80L)